import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from skimage import io
from astropy.stats import RipleysKEstimator
import math
from classes import *
import alpha_shape
from lib2d_py3mod_AM import cms, get_eigenvalues, get_labeled_boutons
import warnings
warnings.filterwarnings("ignore")
filename = './dStorm647_Nc82_8.txt'
border=500
#set AZ parameters
alpha_AZ=800
min_cluster_size_AZ=100
min_samples_AZ=25
#set subcluster parameters
alpha_sub=300
min_cluster_size_sub=24
min_samples_sub=6
cluster_selection_method='leaf'
#set minimum A/D count
min_amplitude=12000
#set size filter True or False
size_filter=True
def show_overview(filename):
maskname = filename[:-4]+'_mask.png'
mask_im=io.imread(maskname)
l = get_labeled_boutons(mask_im).T
selection=Selection(filename=filename)
selection.load_data()
selection.get_bins_rapidstorm(spacing=(10,10))
sloc1=selection.get_masked_data(l > 0)
sloc=sloc1.select_between(['Amplitude'],[(min_amplitude,None)])
# calculate clusters
AZclusters=sloc.get_cluster(kind='HDBSCAN',min_cluster_size=min_cluster_size_AZ,min_samples=min_samples_AZ)
AZclusters.fast_analysis(set_data=1)
AZclusters.data=AZclusters.data.rename(columns={'x_mean':'x','y_mean':'y'})
ofig=plt.figure (figsize=(16,16))
cmap = plt.cm.jet
# iterate the first order clusters(AZ clusters), calculate and plot active zone
for j in AZclusters.indices[1:]:
sel = AZclusters.get_group(j)
xi,yi=AZclusters._dataframe.loc[j][['x','y']]
if j>=0:
plt.text(x=xi,y=yi,s=str(j+1),fontsize=20,color='k')
pol_AZ = alpha_shape.get_alpha_edges(sel.data[['x', 'y']].values, alpha=alpha_AZ)
alpha_area_AZ = alpha_shape.get_alpha_area(sel.data[['x', 'y']].values, alpha=alpha_AZ)
# apply size threshold
# all AZs excluded by the following filters will appear without colored alpha shape in the overview image
if size_filter==True:
if alpha_area_AZ < 30000 or alpha_area_AZ > 300000:
continue
locs_per_AZ=sel.data.shape[0]
if locs_per_AZ >= 8000:
continue
AZ_dens=locs_per_AZ/alpha_area_AZ
if AZ_dens >= 0.06:
continue
rgba = cmap(float(j)/AZclusters.indices[-1])
plt.plot(*np.array(pol_AZ[:]).T, color=rgba)
ax=plt.gca()
ax.set_aspect('equal')
plt.title(filename)
plt.scatter(sloc.data.x,sloc.data.y,s=10,c="k",alpha=.1)
plt.gca().invert_yaxis()
def analyze_AZs(filename):
maskname = filename[:-4]+'_mask.png'
name=os.path.split(filename)[1][:-4]
mask_im=io.imread(maskname)
l = get_labeled_boutons(mask_im).T
selection=Selection(filename=filename)
selection.load_data()
selection.get_bins_rapidstorm(spacing=(10,10))
sloc1=selection.get_masked_data(l > 0)
sloc=sloc1.select_between(['Amplitude'],[(min_amplitude,None)])
loc=sloc.data
# calculate AZclusters
AZclusters=sloc.get_cluster(kind='HDBSCAN',min_cluster_size=min_cluster_size_AZ,min_samples=min_samples_AZ)
AZclusters.fast_analysis(set_data=1)
# prepare empty list for data of whole file
subloc=[]
# iterate the first order clusters(AZ clusters), calculate and plot active zone
for j in AZclusters.indices[1:]:
sel = AZclusters.get_group(j)
if j>=0:
# calculate and plot alphashape of active Zone
pol_AZ = alpha_shape.get_alpha_edges(sel.data[['x', 'y']].values, alpha=alpha_AZ)
alpha_area_AZ = alpha_shape.get_alpha_area(sel.data[['x', 'y']].values, alpha=alpha_AZ)
else:
alpha_area_AZ = 0
# apply size threshold
if size_filter==True:
if alpha_area_AZ < 30000 or alpha_area_AZ > 300000:
continue
e1, e2=get_eigenvalues(sel)
circularity=e1/e2
# prepare plot for single AZ
subfig=plt.figure(figsize=(10,10))
cmap = plt.cm.jet
plt.plot(*np.array(pol_AZ[:]).T, color='k')
subclusters=sel.get_cluster(kind='HDBSCAN',min_cluster_size=min_cluster_size_sub,min_samples=min_samples_sub,
cluster_selection_method=cluster_selection_method)
subclusters.fast_analysis(set_data=1)
# prepare subcluster dataframe
subclusters.data=subclusters.data.rename(columns={'x_mean':'x','y_mean':'y'})
subclusters._dataframe['image name']=os.path.split(selection.filename)[1]
subclusters._dataframe['AZ name']=j+1
subclusters._dataframe['no. of all locs/AZ']=sel.data.shape[0]
subclusters._dataframe['AZ alpha-shape area']=alpha_area_AZ
subclusters._dataframe['sc name']=0
subclusters._dataframe['no. locs/sc']=0
subclusters._dataframe['sc alpha-shape area']=0
subclusters._dataframe['e1']=np.nan
subclusters._dataframe['e2']=np.nan
# iterate the subclusters, calculate and plot active zone
for k in subclusters.indices[0:]:
selc = subclusters.get_group(k)
subclusters._dataframe['sc name'].loc[k]=k+1
subclusters._dataframe['no. locs/sc'].loc[k]=selc.data.shape[0]
xi,yi=subclusters._dataframe.loc[k][['x','y']]
if k>=0:
plt.text(x=xi,y=yi,s=str(k+1),fontsize=20,color='k')
pol_sub = alpha_shape.get_alpha_edges(selc.data[['x', 'y']].values, alpha=alpha_sub)
alpha_area_sub = alpha_shape.get_alpha_area(selc.data[['x', 'y']].values, alpha=alpha_sub)
rgba = cmap(float(k)/subclusters.indices[-1])
plt.plot(*np.array(pol_sub[:]).T, color=rgba)
else:
alpha_area_sub = 0
subclusters._dataframe['sc alpha-shape area'].loc[k] = alpha_area_sub
# calculate circularity
subclusters._dataframe['e1'].loc[k]=e1
subclusters._dataframe['e2'].loc[k]=e2
subclusters._dataframe['circularity']=circularity
# calculate com of all subclusters (=socom)
socom=cms(subclusters.data.iloc[1:])
dist_sc=np.sqrt(np.sum((subclusters.data[['x','y']].values-socom)**2,axis=1))
subclusters._dataframe['rd focom from socom']=dist_sc
subclusters.data=subclusters.data.rename(columns={'x':'focom x coordinate','y':'focom y coordinate'})
# propagate cluster properties to localisations
loc=subclusters.group_to_parent(kdims=subclusters.data.keys())
plt.title('label={}'.format(j+1)+' subclusters={}'.format(subclusters.indices.max()+1)+' circularity={}'.format(circularity))
l_c=sloc.select_between(['x','y'],[((socom[0]-border),(socom[0]+border)),((socom[1]-border),(socom[1]+border))])
plt.scatter(l_c.data.x,l_c.data.y,s=10,c="k",alpha=.1)
# plot localizations if no subclusters found
if (subclusters.indices.max()+1)==0:
l_c=loc.loc[(loc["AZ name"]==j+1)]
plt.scatter(l_c['x'],l_c['y'],s=10,c="k",alpha=.1)
ax=plt.gca()
ax.set_aspect('equal')
# plot socom
plt.scatter(*socom,s=500,c='k',marker='x',linewidths=2)
plt.gca().invert_yaxis()
plt.show()
#subfig.savefig((os.path.join(resultsfolder,name))+'_group{}'.format(group)+'_AZ{}'.format(j+1)+'.jpg', dpi=50)
# sort values
loc['no. of sc per AZ']=subclusters.indices.max()+1
loc=loc.sort_values(by='sc name').rename(columns={'x':'loc x coordinate','y':'loc y coordinate'})
loc['AZ dens']=(loc['no. of all locs/AZ']/loc['AZ alpha-shape area'])*1000000
loc=loc[[u'image name',u'AZ name',u'no. of all locs/AZ','AZ dens','Amplitude','circularity',u'sc name',
u'no. locs/sc','no. of sc per AZ','sc alpha-shape area','AZ alpha-shape area',u'rd focom from socom']]
subloc.append(loc)
return subloc
def analyze_AZs_ripley_H(filename):
maskname = filename[:-4]+'_mask.png'
name=os.path.split(filename)[1][:-4]
mask_im=io.imread(maskname)
l = get_labeled_boutons(mask_im).T
selection=Selection(filename=filename)
selection.load_data()
selection.get_bins_rapidstorm(spacing=(10,10))
sloc1=selection.get_masked_data(l > 0)
sloc=sloc1.select_between(['Amplitude'],[(min_amplitude,None)])
loc=sloc.data
# calculate AZclusters
AZclusters=sloc.get_cluster(kind='HDBSCAN',min_cluster_size=min_cluster_size_AZ,min_samples=min_samples_AZ)
AZclusters.fast_analysis(set_data=1)
# prepare empty list for data of whole file
H_list=[]
# iterate the first order clusters(AZ clusters)
for j in AZclusters.indices[1:]:
sel = AZclusters.get_group(j)
alpha_area_AZ = alpha_shape.get_alpha_area(sel.data[['x', 'y']].values, alpha=alpha_AZ)
# apply size threshold
if size_filter==True:
if alpha_area_AZ < 30000 or alpha_area_AZ > 300000:
continue
locs_per_AZ=sel.data.shape[0]
if locs_per_AZ >= 8000:
continue
AZ_dens=locs_per_AZ/alpha_area_AZ
if AZ_dens >= 0.06:
continue
pnts=np.zeros((sel.data.shape[0],2))
pnts[:,0]=sel.data.x
pnts[:,1]=sel.data.y
Kest = RipleysKEstimator(area=alpha_area_AZ)
r = np.linspace(0, 120, 121)
K=Kest(data=pnts,radii=r)
L=np.sqrt(K/math.pi)
H=L-r
H_list.append(H)
return H_list
print (filename)
show_overview(filename)
subloc=analyze_AZs(filename)
loc=pd.concat(subloc)
H_list=analyze_AZs_ripley_H(filename)
./dStorm647_Nc82_8.txt
# load data
sel=Selection(filename=filename)
sel.load_data()
sel=sel.select_between(['Amplitude'],[(12000,None)])
fig=plt.figure (figsize=(16,16))
# 10 nm pixel binning, saturation with 5 localizations per pixel:
plt.hist2d(sel.data.x,sel.data.y,bins=int(sel.data.x.max()/10),cmax=5,cmap=plt.cm.hot)
plt.colorbar()
ax=plt.gca()
ax.set_aspect('equal')
ax.invert_yaxis()
plt.show()
grouped=loc.groupby(['image name','AZ name']).mean()
grouped.mean()
no. of all locs/AZ 1988.920000 AZ dens 15185.247430 Amplitude 52826.083119 circularity 0.656082 sc name 4.352113 no. locs/sc 583.982213 no. of sc per AZ 16.840000 sc alpha-shape area 1187.581958 AZ alpha-shape area 129942.511253 rd focom from socom 78.195753 dtype: float64
grouped
| no. of all locs/AZ | AZ dens | Amplitude | circularity | sc name | no. locs/sc | no. of sc per AZ | sc alpha-shape area | AZ alpha-shape area | rd focom from socom | ||
|---|---|---|---|---|---|---|---|---|---|---|---|
| image name | AZ name | ||||||||||
| dStorm647_Nc82_8.txt | 1 | 3235.0 | 21775.553014 | 54034.571623 | 0.253945 | 5.695518 | 932.327357 | 23.0 | 1062.279074 | 148561.0950 | 110.780512 |
| 2 | 1445.0 | 19244.452154 | 56029.963114 | 0.734783 | 2.658131 | 338.894810 | 10.0 | 1544.014820 | 75086.5750 | 51.159884 | |
| 3 | 3664.0 | 27356.395257 | 60818.662282 | 0.614318 | 6.251365 | 1356.857533 | 26.0 | 433.828226 | 133935.7750 | 60.753186 | |
| 4 | 1791.0 | 12382.159914 | 60213.725516 | 0.908393 | 4.948632 | 343.435511 | 17.0 | 1455.882825 | 144643.5850 | 83.068430 | |
| 5 | 1747.0 | 17355.054868 | 55774.045850 | 0.461874 | 5.088151 | 418.499714 | 16.0 | 1064.677373 | 100662.3150 | 76.125758 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | |
| 77 | 878.0 | 22486.994200 | 48658.147380 | 0.476011 | 1.782460 | 256.519362 | 6.0 | 775.531536 | 39044.7915 | 58.834774 | |
| 78 | 1708.0 | 17616.612771 | 52978.998126 | 0.451550 | 4.377635 | 471.857143 | 16.0 | 845.396249 | 96953.9390 | 98.381222 | |
| 79 | 828.0 | 17245.650683 | 39108.258816 | 0.601208 | 3.492754 | 204.241546 | 7.0 | 2721.772611 | 48012.1055 | 61.606624 | |
| 91 | 1780.0 | 18631.106750 | 53623.701180 | 0.818557 | 3.741011 | 464.919101 | 15.0 | 1295.807414 | 95539.1445 | 80.157512 | |
| 94 | 2069.0 | 18292.426236 | 50980.337603 | 0.489795 | 5.998550 | 543.884485 | 17.0 | 1587.202244 | 113106.9205 | 104.020735 |
75 rows × 10 columns
loc[loc['AZ name']==2].mean()
AZ name 2.000000 no. of all locs/AZ 1445.000000 AZ dens 19244.452154 Amplitude 56029.963114 circularity 0.734783 sc name 2.658131 no. locs/sc 338.894810 no. of sc per AZ 10.000000 sc alpha-shape area 1544.014820 AZ alpha-shape area 75086.575000 rd focom from socom 51.159884 dtype: float64
H_list_avg=sum(H_list)/len(H_list)
Kest = RipleysKEstimator(area=300000)
r = np.linspace(0, 120, 121)
K_poisson=Kest.poisson(r)
L_poisson=np.sqrt(K_poisson/math.pi)
H_poisson=L_poisson-r
fig = plt.figure(figsize=(6,5))
r = np.linspace(0, 120, 121)
p1 = plt.plot(r, H_list_avg, c='k')
p2 = plt.plot(r, H_poisson,color='g', linestyle='dashed')
plt.legend((p1[0], p2[0]), ('AZs', 'poisson distribution'))
plt.ylabel('H(r)')
plt.xlabel('r [nm]')
plt.xlim(0,120)
plt.show()
# make scatter plot of whole file
sel=Selection(filename=filename)
sel.load_data()
# in the following set point size and color
plt.figure(figsize=(10,10))
plt.scatter(sel.data.x,sel.data.y,s=10e-5,c='k')
ax=plt.gca()
ax.set_aspect('equal')
ax.invert_yaxis()
# make small selection of 10000x10000 nm size
# to get background analysis window of 100 µm2 size
import matplotlib.patches as patches
xmin=2000
xmax=12000
ymin=20000
ymax=30000
sel2=sel.select_between(['x','y'],[(xmin,xmax),(ymin,ymax)])
plt.figure(figsize=(10,10))
plt.scatter(sel.data.x,sel.data.y,s=10e-5,c='k')
ax=plt.gca()
rect=patches.Rectangle((xmin,ymin),width=10000,height=10000,fill=0,color='b')
ax.add_patch(rect)
ax.set_aspect('equal')
ax.invert_yaxis()
# now calculate localizations per µm2
background = sel2.data.shape[0]/100
print (background)
38.26